## BART model from
## Jennifer L. Hill (2011) Bayesian Nonparametric Modeling for Causal Inference, 
## Journal of Computational and Graphical Statistics, 20:1, 217-240, 
## DOI: 10.1198/jcgs.2010.08162

dgp_exp_pop <- function(total_size,
                        K, K_W, 
                        seed_use = 1234, sim_index  = 1, sd_Y = 0.1,
                        outcome_type = "lm"){
  
  set.seed(seed_use)
  coef_0 <- rnorm(n = K+1)
  
  if(outcome_type == "lm") {
    coef_tau = rep(1, K_W)
    coef_select = rep(1, K_W)
  }
  
  if(outcome_type == "bart") {
    coef_0_k <- coef_0[(K_W + 1):K]
    coef_0 <-sample(c(0, 0.1, 0.2, 0.3, 0.4)*2, size = K_W, replace = TRUE, prob = c(0.2, 0.2, 0.2, 0.2, 0.2))
    coef_select = rep(1, K_W)
  }
  
  set.seed(sim_index)
  X  <- mvrnorm(n = total_size, mu = rep(0, K), Sigma = diag(K))
  X_W  <- X[, 1:K_W]
  
  # Selection Model
  select_prob  <- inv.logit(X_W %*% coef_select)
  select_var <- rbinom(n = total_size, size = 1, prob = select_prob)
  
  if(outcome_type == "lm") {
    # Potential Outcomes (for both experimental and population data)
    Y_0 <- cbind(1, X) %*% coef_0 +  rnorm(n = total_size, sd = sd_Y)
    tau <- X_W %*% coef_tau + rnorm(n = total_size, sd = sd_Y)
    Y_1 <- Y_0 + tau
  }
  
  if(outcome_type == "bart") {
    Y_0 <- exp(cbind(X_W + matrix(0.5, nrow = nrow(X), ncol = K_W)) %*% coef_0) + 
      X[, (K_W + 1):K] %*% coef_0_k + rnorm(n = total_size, sd = sd_Y)
    Y_1 <- X_W %*% coef_0 + rnorm(n = total_size, sd = sd_Y)
  }
  
  
  # Population Data
  pop_data <-  cbind(Y_1, Y_0, X)[select_var == 0, ]
  colnames(pop_data) <- c("Y_1", "Y_0",
                          paste0("X_",seq(from  = 1, to = ncol(X))))
  
  # Experimental Data
  exp_size  <- sum(select_var)
  
  ## Make Clusters
  treatment_assignment <- sample(1:exp_size, size = floor(exp_size/2), replace = FALSE)
  
  treatment <- rep(0, exp_size)
  treatment[treatment_assignment] <- 1
  
  # observe outcomes
  Y <- treatment*Y_1[select_var == 1] + (1-treatment)*Y_0[select_var == 1]
  
  # experimental data
  exp_data <- cbind(Y, Y_1[select_var == 1], Y_0[select_var == 1],
                    treatment, X[select_var == 1,])
  colnames(exp_data) <- c("Y", "Y_1", "Y_0",
                          "treatment", paste0("X_",seq(from  = 1, to = ncol(X))))
  
  return(list(pop_data =  pop_data,  exp_data =  exp_data))
  
}